% Simulated avalanches, aggregated at daily frequency
% October 2022
% Makoto Nirei

% require simulatation data generated by simulations.m

clearvars
load simu.mat piv calvot pilength nump dlogP mu n Z zeta eta vp rho rm a delta delta_u tol_w findq alpha_U gamma_U; % master simulation data; pi=0.03
i_pi=2; % index for pi=0.03
pi=piv(i_pi);
get_equilibrium;

day_time=1/365;
maxday=floor(calvot(Z)/day_time);

edges=(0:maxday)*day_time;
[calvod,edges1]=histcounts(calvot, edges);
calvod=cumsum(calvod');

dlogPd=zeros(maxday,1);  Zdp=zeros(maxday,1); Zp=Zdp; Zdn=Zdp; Zn=Zdn;
dlogPd(1)=sum(dlogP(1:calvod(1),i_pi)); % daily price increase
idld=(nump>0); % indicator for price rise
%Zdp:  total adjusting firms in a day including Calvos
Zdp(1)=sum( nump(1:calvod(1), i_pi).* idld(1:calvod(1),i_pi));
Zp(1)= sum(idld(1:calvod(1),i_pi));
Zdn(1)=sum( nump(1:calvod(1), i_pi).* (~idld(1:calvod(1),i_pi)));
Zn(1)= sum(~(idld(1:calvod(1),i_pi)));

for i=2:maxday
    dlogPd(i,1)=sum(dlogP(calvod(i-1)+1:calvod(i), i_pi));
    Zdp(i,1)=sum( nump(calvod(i-1)+1:calvod(i), i_pi).*idld(calvod(i-1)+1:calvod(i),i_pi)); % This L inculdes Calvo
    Zp(i,1)= sum(idld(calvod(i-1)+1:calvod(i),i_pi));
    Zdn(i,1)=sum( nump(calvod(i-1)+1:calvod(i),i_pi).*(~idld(calvod(i-1)+1:calvod(i),i_pi))); % This L inculdes Calvo
    Zn(i,1)= sum(~(idld(calvod(i-1)+1:calvod(i),i_pi)));
end

mu_hat=mu*day_time*n;

LLp=nump(nump(:,i_pi)>0,i_pi)-1; % each positive avalanche (excluding calvo)
LLn=nump(nump(:,i_pi)<0,i_pi)-1; % each negative avalanche (excluding calvo)

%% fitting GPD

yo=LLp+1; % avalanche size + 1
%yo2=reshape(yo(1:5*floor(length(yo)/5)),5, floor(length(yo)/5)); % 5 series of skipped data
%y=yo2(1,:)'; % 1st series of skipped data
m=max(yo);

% null hypothesis -- model theta and theta0
for i=1:2
    theta_tmp(i) = vp(q(i),1-eta)/vp(q(i),mu/pi) *pb(i)^(1-eta);
    lambda_1(i) = vp(pt(i),1-eta);
    lambda_2(i) = 1/vp(q(i),mu/pi);
end
theta=mean(theta_tmp);
lambda = mu/(1-theta) *mean(lambda_2)*mean(lambda_1);
theta0=lambda*(1-theta)/mu;
th_nul=theta;
t0_nul=theta0;

% time-series stats of L (footnote 21)
disp('unconditional mean of L');
disp(mean(LLp));
disp('conditional on L(i-1)>0');
disp(mean(LLp(find(LLp(1:end-1)>0)+1)));
disp('autocorrelation');
disp(corrcoef(LLp(2:end),LLp(1:end-1)));

% Moment estimators for (theta0, theta), Consul p.170
% Fitting GPD to LL2; using no-skipped data
nna=length(yo);
t0_mma=sqrt(mean(yo-1)^3/ var(yo-1)); % Moment estimator for theta0
th_mma=1-sqrt(mean(yo-1)/var(yo-1));  % Moment estimator for theta
v_th_mma=((1-th_nul)/2/nna/t0_nul)...
    *(t0_nul - t0_nul*th_nul + 2*th_nul+3*t0_nul^2); % var(hat theta)
b_th_mma=-1/(4*nna*t0_nul)*(5*t0_nul*(1-th_nul)+t0_nul*(10+9*t0_nul^2));    % bias(hat theta)
disp('theta0 mm, hat theta, std hat theta, bias hat theta');
disp([t0_mma th_mma sqrt(v_th_mma) b_th_mma]);

tv=[t0_mma; th_mma];
pGP=zeros(m+2,1); ccGP=zeros(m+1,1);
yh=zeros(m+2,1); cyh=zeros(m+1,1);
pGP(1)=exp(-tv(1));         % probability distribution of GP. Note that pGP(1) is the prob of 0.
for i=1:m
    pGP(i+1)=pGP(i)/i/exp(tv(2)) *((tv(2)*i+tv(1))/(tv(2)*(i-1)+tv(1)))^(i-1)*(tv(2)*(i-1)+tv(1));
end
pGP(m+2)=1-sum(pGP(1:m+1));
ccGP(m+1)=pGP(m+2);         % counter-cumulative distribution funciton of GP
for i=1:m
    ccGP(m+1-i)=ccGP(m+2-i)+pGP(m+2-i);
end

% plot distribution of non-skipped avalanches
yo_zeros=sum(yo==1);
sort_y=sort(yo(yo>1)-1);
freq_cc=[length(yo)-yo_zeros:-1:1]/length(yo);
figure(5)
loglog(sort_y,freq_cc,'o');
hold;
loglog(1:m+1,ccGP,'--','LineWidth',2);
hold off;
legend('Model', 'Genaralized Poisson','Location','Southwest');
xlabel('L');
ylabel('Counter-cumulative distribution function');
ax=gca;
ax.FontSize=16;
ax.YLim=[10^(-7) 10^0];
ax.XLim=[10^0 3*10^3];
grid; 
grid minor;

ind_ccd=zeros(m-1,1); % m=max(yo)=max(sort_y+1)
for i=1:m-1
    ind_ccd(i)=sum(sort_y<i)+1;
end
emp_ccd=freq_cc(ind_ccd)';
CV_ccd=sqrt(mean((log([1; emp_ccd(1:m-1)])-log([1; ccGP(1:m-1)])).^2))/abs(mean(log([1; emp_ccd(1:m-1)])));
disp('MSE(log emp_ccd - log ccGP)/abs(mean(log emp_ccd))');
disp(CV_ccd);

% plot for subsamples
thhat_n=500;
i_length=floor(length(yo)/thhat_n);
thhat=zeros(i_length,1); t0hat=thhat; v_thhat=thhat;
for i=1:i_length
    data=yo(thhat_n*(i-1)+1:thhat_n*i,:);
    thhat(i)=1-sqrt(1/(var(data-1)/mean(data-1)));  % ME for theta
    t0hat(i)=sqrt(mean(yo(thhat_n*(i-1)+1:thhat_n*i,:)-1)^3/ var(yo(thhat_n*(i-1)+1:thhat_n*i,:)-1)); % ME for theta0
end
figure(6), hist(thhat,100);
xlabel('Estimated \theta');
ax=gca;
ax.FontSize=20;

v_thhat=((1-th_nul)/2/thhat_n/t0_nul)*(t0_nul-t0_nul*th_nul+2*th_nul+3*t0_nul^2);  % var(hat theta) under the null    
disp('s.e. of hat theta');
disp(sqrt(v_thhat));

disp('model theta0 and theta');
disp([t0_nul th_nul]);

% compute V(L)/E(L)
EL=theta0/(1-theta);
tmp_s2a=zeros(1,2); tmp_vq=zeros(1,2);
for i=1:2
    tmp_s2a(i)= (pb(i)^(1-eta) * vp(q(i),1-eta))^2 / vp(q(i),mu/pi);
    tmp_vq(i) = (pb(i)^(1-eta)/2)^2 *( vp(q(i),(1-eta)*2+mu/pi) / vp(q(i),mu/pi) ...
                 - ( vp(q(i), 1-eta +mu/pi) / vp(q(i),mu/pi) )^2 ); 
end
s2th_a = (lambda/pi)* mean(tmp_s2a) - theta^2;
s2th_0 = sum(tmp_vq) * (lambda/pi/(1-eta))^2;
VL = theta0*(1+s2th_a)/(1-theta)^3 + s2th_0/(1-theta)^2;
CVL = VL/EL;
LowerBoundCVL = 1/(1-theta)^2;
disp('CV(L) lowerbound lowerbound/CV(L)');
disp([CVL LowerBoundCVL LowerBoundCVL/CVL]);

%% Daily avalanches

disp_Z = var(Zdp)/mean(Zdp);
endo_Z = var(LLp)/(mean(LLp)+1);
exog_Z = mean(Zdp)/(mu_hat*(0.5+0.5*(1-zeta)));
disp('LHS RHS1 RHS2 RHS2/LHS');
disp([disp_Z endo_Z exog_Z exog_Z/disp_Z]);

t0_Z=sqrt(mean(Zdp)^3/ var(Zdp)); % ME for theta0
th_Z=1-sqrt(mean(Zdp)/var(Zdp));  % ME for theta
disp('theta0 mm, hat theta');
disp([t0_Z th_Z]);

% fit by GPD

% nna=length(yo);
% t0_mma=sqrt(mean(yo-1)^3/ var(yo-1)); % Moment estimator for theta0
% th_mma=1-sqrt(mean(yo-1)/var(yo-1));  % Moment estimator for theta
% v_th_mma=((1-th_nul)/2/nna/t0_nul)...
%     *(t0_nul - t0_nul*th_nul + 2*th_nul+3*t0_nul^2); % var(hat theta)
% b_th_mma=-1/(4*nna*t0_nul)*(5*t0_nul*(1-th_nul)+t0_nul*(10+9*t0_nul^2));    % bias(hat theta)
% disp('theta0 mm, hat theta, std hat theta, bias hat theta');
% disp([t0_mma th_mma sqrt(v_th_mma) b_th_mma]);

tvZ=[t0_Z; th_Z];

mZ=max(Zdp)+1;
pGPZ=zeros(mZ+2,1); ccGPZ=zeros(mZ+1,1);
yhZ=zeros(mZ+2,1); cyhZ=zeros(mZ+1,1);
pGPZ(1)=exp(-tvZ(1));         % probability distribution of GP. Note that pGP(1) is the prob of 0.
for i=1:mZ
    pGPZ(i+1)=pGPZ(i)/i/exp(tvZ(2)) *((tvZ(2)*i+tvZ(1))/(tvZ(2)*(i-1)+tvZ(1)))^(i-1)*(tvZ(2)*(i-1)+tvZ(1));
end
pGPZ(mZ+2)=1-sum(pGPZ(1:mZ+1));
ccGPZ(mZ+1)=pGPZ(mZ+2);         % counter-cumulative distribution funciton of GP
for i=1:mZ
    ccGPZ(mZ+1-i)=ccGPZ(mZ+2-i)+pGPZ(mZ+2-i);
end

% plot distribution of non-skipped avalanches
Zdp_zeros=sum(Zdp==1);
sort_Zdp=sort(Zdp(Zdp>1)-1);
freq_ccZ=[length(Zdp)-Zdp_zeros:-1:1]/length(Zdp);
figure(17)
loglog(sort_Zdp,freq_ccZ(1:length(sort_Zdp)),'o');
hold;
loglog(1:mZ+1,ccGPZ,'--','LineWidth',2);
hold off;
legend('Model', 'Genaralized Poisson','Location','Southwest');
xlabel('Daily sum of avalanches');
ylabel('Counter-cumulative distribution function');
ax=gca;
ax.FontSize=16;
ax.YLim=[10^(-7) 10^0];
ax.XLim=[10^0 3*10^3];
grid; 
grid minor;

ind_ccdZ=zeros(mZ-1,1); % m=max(yo)=max(sort_y+1)
for i=1:mZ-1
    ind_ccdZ(i)=sum(sort_Zdp<i)+1;
end
emp_ccdZ=freq_ccZ(ind_ccdZ)';
CV_ccdZ=sqrt(mean((log([1; emp_ccdZ(1:mZ-1)])-log([1; ccGPZ(1:mZ-1)])).^2))/abs(mean(log([1; emp_ccdZ(1:mZ-1)])));
disp('MSE(log emp_ccdZ - log ccGPZ)/abs(mean(log emp_ccdZ))');
disp(CV_ccdZ);
